home *** CD-ROM | disk | FTP | other *** search
- #ifndef lint
- static char Rcs_Id[] =
- "$Id: tree.c,v 1.56 1995/01/08 23:23:49 geoff Exp $";
- #endif
-
- /*
- * tree.c - a hash style dictionary for user's personal words
- *
- * Pace Willisson, 1983
- * Hash support added by Geoff Kuenning, 1987
- *
- * Copyright 1987, 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All modifications to the source code must be clearly marked as
- * such. Binary redistributions based on modified source code
- * must be clearly marked as modified versions in the documentation
- * and/or other materials provided with the distribution.
- * 4. All advertising materials mentioning features or use of this software
- * must display the following acknowledgment:
- * This product includes software developed by Geoff Kuenning and
- * other unpaid contributors.
- * 5. The name of Geoff Kuenning may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
- /*
- * $Log: tree.c,v $
- * Revision 1.56 1995/01/08 23:23:49 geoff
- * Support PDICTHOME for DOS purposes.
- *
- * Revision 1.55 1994/10/25 05:46:27 geoff
- * Fix a comment that looked to some compilers like it might be nested.
- *
- * Revision 1.54 1994/01/25 07:12:15 geoff
- * Get rid of all old RCS log lines in preparation for the 3.1 release.
- *
- */
-
- #include <ctype.h>
- #include <errno.h>
- #include "config.h"
- #include "ispell.h"
- #include "proto.h"
- #include "msgs.h"
-
- void treeinit P ((char * p, char * LibDict));
- static FILE * trydict P ((char * dictname, char * home,
- char * prefix, char * suffix));
- static void treeload P ((FILE * dictf));
- void treeinsert P ((char * word, int wordlen, int keep));
- static struct dent * tinsert P ((struct dent * proto));
- struct dent * treelookup P ((ichar_t * word));
- #if SORTPERSONAL != 0
- static int pdictcmp P ((struct dent ** enta, struct dent **entb));
- #endif /* SORTPERSONAL != 0 */
- void treeoutput P ((void));
- VOID * mymalloc P ((unsigned int size));
- void myfree P ((VOID * ptr));
- #ifdef REGEX_LOOKUP
- char * do_regex_lookup P ((char * expr, int whence));
- #endif /* REGEX_LOOKUP */
-
- static int cantexpand = 0; /* NZ if an expansion fails */
- static struct dent * pershtab; /* Aux hash table for personal dict */
- static int pershsize = 0; /* Space available in aux hash table */
- static int hcount = 0; /* Number of items in hash table */
-
- /*
- * Hash table sizes. Prime is probably a good idea, though in truth I
- * whipped the algorithm up on the spot rather than looking it up, so
- * who knows what's really best? If we overflow the table, we just
- * use a double-and-add-1 algorithm.
- */
- static int goodsizes[] =
- {
- 53, 223, 907, 3631
- };
-
- static char personaldict[MAXPATHLEN];
- static FILE * dictf;
- static newwords = 0;
-
- void treeinit (p, LibDict)
- char * p; /* Value specified in -p switch */
- char * LibDict; /* Root of default dict name */
- {
- int abspath; /* NZ if p is abs path name */
- char * h; /* Home directory name */
- char seconddict[MAXPATHLEN]; /* Name of secondary dict */
- FILE * secondf; /* Access to second dict file */
-
- /*
- ** If -p was not specified, try to get a default name from the
- ** environment. After this point, if p is null, the the value in
- ** personaldict is the only possible name for the personal dictionary.
- ** If p is non-null, then there is a possibility that we should
- ** prepend HOME to get the correct dictionary name.
- */
- if (p == NULL)
- p = getenv (PDICTVAR);
- /*
- ** if p exists and begins with '/' we don't really need HOME,
- ** but it's not very likely that HOME isn't set anyway (on non-DOS
- ** systems).
- */
- if ((h = getenv (HOME)) == NULL)
- {
- #ifdef PDICTHOME
- h = PDICTHOME;
- #else /* PDICTHOME */
- return;
- #endif /* PDICTHOME */
- }
-
- if (p == NULL)
- {
- /*
- * No -p and no PDICTVAR. We will use LibDict and DEFPAFF to
- * figure out the name of the personal dictionary and where it
- * is. The rules are as follows:
- *
- * (1) If there is a local dictionary and a HOME dictionary,
- * both are loaded, but changes are saved in the local one.
- * The dictionary to save changes in is named "personaldict".
- * (2) Dictionaries named after the affix file take precedence
- * over dictionaries with the default suffix (DEFPAFF).
- * (3) Dictionaries named with the new default names
- * (DEFPDICT/DEFPAFF) take precedence over the old ones
- * (OLDPDICT/OLDPAFF).
- * (4) Dictionaries aren't combined unless they follow the same
- * naming scheme.
- * (5) If no dictionary can be found, a new one is created in
- * the home directory, named after DEFPDICT and the affix
- * file.
- */
- #ifdef AMIGA /* Personal dicts are placed with the primary dicts */
- dictf = trydict (personaldict, LIBDIR, DEFPDICT, LibDict);
- if (personaldict[0] == '\0')
- sprintf (personaldict, "%s/%s%s", LIBDIR, DEFPDICT, LibDict);
- #else
- dictf = trydict (personaldict, (char *) NULL, DEFPDICT, LibDict);
- secondf = trydict (seconddict, h, DEFPDICT, LibDict);
- if (dictf == NULL && secondf == NULL)
- {
- dictf = trydict (personaldict, (char *) NULL, DEFPDICT, DEFPAFF);
- secondf = trydict (seconddict, h, DEFPDICT, DEFPAFF);
- }
- if (dictf == NULL && secondf == NULL)
- {
- dictf = trydict (personaldict, (char *) NULL, OLDPDICT, LibDict);
- secondf = trydict (seconddict, h, OLDPDICT, LibDict);
- }
- if (dictf == NULL && secondf == NULL)
- {
- dictf = trydict (personaldict, (char *) NULL, OLDPDICT, OLDPAFF);
- secondf = trydict (seconddict, h, OLDPDICT, OLDPAFF);
- }
- if (personaldict[0] == '\0')
- {
- if (seconddict[0] != '\0')
- (void) strcpy (personaldict, seconddict);
- else
- (void) sprintf (personaldict, "%s/%s%s", h, DEFPDICT, LibDict);
- }
- #endif
- if (dictf != NULL)
- {
- treeload (dictf);
- (void) fclose (dictf);
- }
- #ifndef AMIGA /* Only one dict */
- if (secondf != NULL)
- {
- treeload (secondf);
- (void) fclose (secondf);
- }
- #endif
- }
- else
- {
- /*
- ** Figure out if p is an absolute path name. Note that beginning
- ** with "./" and "../" is considered an absolute path, since this
- ** still means we can't prepend HOME.
- */
- #ifndef AMIGA /* Always absolute path on Amiga */
- abspath = (*p == '/' || strncmp (p, "./", 2) == 0
- || strncmp (p, "../", 3) == 0);
- if (abspath)
- {
- #endif
- (void) strcpy (personaldict, p);
- if ((dictf = fopen (personaldict, "r")) != NULL)
- {
- treeload (dictf);
- (void) fclose (dictf);
- }
- #ifndef AMIGA
- }
- else
- {
- /*
- ** The user gave us a relative pathname. We will try it
- ** locally, and if that doesn't work, we'll try the home
- ** directory. If neither exists, it will be created in
- ** the home directory if words are added.
- */
- (void) strcpy (personaldict, p);
- if ((dictf = fopen (personaldict, "r")) != NULL)
- {
- treeload (dictf);
- (void) fclose (dictf);
- }
- else if (!abspath)
- {
- /* Try the home */
- (void) sprintf (personaldict, "%s/%s", h, p);
- if ((dictf = fopen (personaldict, "r")) != NULL)
- {
- treeload (dictf);
- (void) fclose (dictf);
- }
- }
- /*
- * If dictf is null, we couldn't open the dictionary
- * specified in the -p switch. Complain.
- */
- if (dictf == NULL)
- {
- (void) fprintf (stderr, CANT_OPEN, p);
- perror ("");
- return;
- }
- }
- #endif
- }
-
- if (!lflag && !aflag
- && access (personaldict, 2) < 0 && errno != ENOENT)
- {
- (void) fprintf (stderr, TREE_C_CANT_UPDATE, personaldict);
- (void) sleep ((unsigned) 2);
- }
- }
-
- /*
- * Try to open a dictionary. As a side effect, leaves the dictionary
- * name in "filename" if one is found, and leaves a null string there
- * otherwise.
- */
- static FILE * trydict (filename, home, prefix, suffix)
- char * filename; /* Where to store the file name */
- char * home; /* Home directory */
- char * prefix; /* Prefix for dictionary */
- char * suffix; /* Suffix for dictionary */
- {
- FILE * dictf; /* Access to dictionary file */
-
- if (home == NULL)
- (void) sprintf (filename, "%s%s", prefix, suffix);
- else
- (void) sprintf (filename, "%s/%s%s", home, prefix, suffix);
- dictf = fopen (filename, "r");
- if (dictf == NULL)
- filename[0] = '\0';
- return dictf;
- }
-
- static void treeload (loadfile)
- register FILE * loadfile; /* File to load words from */
- {
- char buf[BUFSIZ]; /* Buffer for reading pers dict */
-
- while (fgets (buf, sizeof buf, loadfile) != NULL)
- treeinsert (buf, sizeof buf, 1);
- newwords = 0;
- }
-
- void treeinsert (word, wordlen, keep)
- char * word; /* Word to insert - must be canonical */
- int wordlen; /* Length of the word buffer */
- int keep;
- {
- register int i;
- struct dent wordent;
- register struct dent * dp;
- struct dent * olddp;
- #ifndef NO_CAPITALIZATION_SUPPORT
- struct dent * newdp;
- #endif
- struct dent * oldhtab;
- int oldhsize;
- ichar_t nword[INPUTWORDLEN + MAXAFFIXLEN];
- #ifndef NO_CAPITALIZATION_SUPPORT
- int isvariant;
- #endif
-
- /*
- * Expand hash table when it is MAXPCT % full.
- */
- if (!cantexpand && (hcount * 100) / MAXPCT >= pershsize)
- {
- oldhsize = pershsize;
- oldhtab = pershtab;
- for (i = 0; i < sizeof goodsizes / sizeof (goodsizes[0]); i++)
- {
- if (goodsizes[i] > pershsize)
- break;
- }
- if (i >= sizeof goodsizes / sizeof goodsizes[0])
- pershsize += pershsize + 1;
- else
- pershsize = goodsizes[i];
- pershtab =
- (struct dent *) calloc ((unsigned) pershsize, sizeof (struct dent));
- if (pershtab == NULL)
- {
- (void) fprintf (stderr, TREE_C_NO_SPACE);
- /*
- * Try to continue anyway, since our overflow
- * algorithm can handle an overfull (100%+) table,
- * and the malloc very likely failed because we
- * already have such a huge table, so small mallocs
- * for overflow entries will still work.
- */
- if (oldhtab == NULL)
- exit (1); /* No old table, can't go on */
- (void) fprintf (stderr, TREE_C_TRY_ANYWAY);
- cantexpand = 1; /* Suppress further messages */
- pershsize = oldhsize; /* Put things back */
- pershtab = oldhtab; /* ... */
- newwords = 1; /* And pretend it worked */
- }
- else
- {
- /*
- * Re-insert old entries into new table
- */
- for (i = 0; i < oldhsize; i++)
- {
- dp = &oldhtab[i];
- if (dp->flagfield & USED)
- {
- #ifdef NO_CAPITALIZATION_SUPPORT
- (void) tinsert (dp);
- #else
- newdp = tinsert (dp);
- isvariant = (dp->flagfield & MOREVARIANTS);
- #endif
- dp = dp->next;
- #ifdef NO_CAPITALIZATION_SUPPORT
- while (dp != NULL)
- {
- (void) tinsert (dp);
- olddp = dp;
- dp = dp->next;
- free ((char *) olddp);
- }
- #else
- while (dp != NULL)
- {
- if (isvariant)
- {
- isvariant = dp->flagfield & MOREVARIANTS;
- olddp = newdp->next;
- newdp->next = dp;
- newdp = dp;
- dp = dp->next;
- newdp->next = olddp;
- }
- else
- {
- isvariant = dp->flagfield & MOREVARIANTS;
- newdp = tinsert (dp);
- olddp = dp;
- dp = dp->next;
- free ((char *) olddp);
- }
- }
- #endif
- }
- }
- if (oldhtab != NULL)
- free ((char *) oldhtab);
- }
- }
-
- /*
- ** We're ready to do the insertion. Start by creating a sample
- ** entry for the word.
- */
- if (makedent (word, wordlen, &wordent) < 0)
- return; /* Word must be too big or something */
- if (keep)
- wordent.flagfield |= KEEP;
- /*
- ** Now see if word or a variant is already in the table. We use the
- ** capitalized version so we'll find the header, if any.
- **/
- (void) strtoichar (nword, word, sizeof nword, 1);
- upcase (nword);
- if ((dp = lookup (nword, 1)) != NULL)
- {
- /* It exists. Combine caps and set the keep flag. */
- if (combinecaps (dp, &wordent) < 0)
- {
- free (wordent.word);
- return;
- }
- }
- else
- {
- /* It's new. Insert the word. */
- dp = tinsert (&wordent);
- #ifndef NO_CAPITALIZATION_SUPPORT
- if (captype (dp->flagfield) == FOLLOWCASE)
- (void) addvheader (dp);
- #endif
- }
- newwords |= keep;
- }
-
- static struct dent * tinsert (proto)
- struct dent * proto; /* Prototype entry to copy */
- {
- ichar_t iword[INPUTWORDLEN + MAXAFFIXLEN];
- register int hcode;
- register struct dent * hp; /* Next trial entry in hash table */
- register struct dent * php; /* Prev. value of hp, for chaining */
-
- if (strtoichar (iword, proto->word, sizeof iword, 1))
- (void) fprintf (stderr, WORD_TOO_LONG (proto->word));
- #ifdef NO_CAPITALIZATION_SUPPORT
- upcase (iword);
- #endif
- hcode = hash (iword, pershsize);
- php = NULL;
- hp = &pershtab[hcode];
- if (hp->flagfield & USED)
- {
- while (hp != NULL)
- {
- php = hp;
- hp = hp->next;
- }
- hp = (struct dent *) calloc (1, sizeof (struct dent));
- if (hp == NULL)
- {
- (void) fprintf (stderr, TREE_C_NO_SPACE);
- exit (1);
- }
- }
- *hp = *proto;
- if (php != NULL)
- php->next = hp;
- hp->next = NULL;
- return hp;
- }
-
- struct dent * treelookup (word)
- register ichar_t * word;
- {
- register int hcode;
- register struct dent * hp;
- char chword[INPUTWORDLEN + MAXAFFIXLEN];
-
- if (pershsize <= 0)
- return NULL;
- (void) ichartostr (chword, word, sizeof chword, 1);
- hcode = hash (word, pershsize);
- hp = &pershtab[hcode];
- while (hp != NULL && (hp->flagfield & USED))
- {
- if (strcmp (chword, hp->word) == 0)
- break;
- #ifndef NO_CAPITALIZATION_SUPPORT
- while (hp->flagfield & MOREVARIANTS)
- hp = hp->next;
- #endif
- hp = hp->next;
- }
- if (hp != NULL && (hp->flagfield & USED))
- return hp;
- else
- return NULL;
- }
-
- #if SORTPERSONAL != 0
- /* Comparison routine for sorting the personal dictionary with qsort */
- static int pdictcmp (enta, entb)
- struct dent ** enta;
- struct dent ** entb;
- {
-
- /* The parentheses around *enta and *entb below are NECESSARY!
- ** Otherwise the compiler reads it as *(enta->word), or
- ** enta->word[0], which is illegal (but pcc takes it and
- ** produces wrong code).
- **/
- return casecmp ((*enta)->word, (*entb)->word, 1);
- }
- #endif
-
- void treeoutput ()
- {
- register struct dent * cent; /* Current entry */
- register struct dent * lent; /* Linked entry */
- #if SORTPERSONAL != 0
- int pdictsize; /* Number of entries to write */
- struct dent ** sortlist; /* List of entries to be sorted */
- register struct dent ** sortptr; /* Handy pointer into sortlist */
- #endif
- register struct dent * ehtab; /* End of pershtab, for fast looping */
-
- if (newwords == 0)
- return;
-
- printf("Saving to '%s'", personaldict);
- fflush(stdout);
-
- if ((dictf = fopen (personaldict, "w")) == NULL)
- {
- (void) fprintf (stderr, CANT_CREATE, personaldict);
- return;
- }
-
- #if SORTPERSONAL != 0
- /*
- ** If we are going to sort the personal dictionary, we must know
- ** how many items are going to be sorted.
- */
- pdictsize = 0;
- if (hcount >= SORTPERSONAL)
- sortlist = NULL;
- else
- {
- for (cent = pershtab, ehtab = pershtab + pershsize;
- cent < ehtab;
- cent++)
- {
- for (lent = cent; lent != NULL; lent = lent->next)
- {
- if ((lent->flagfield & (USED | KEEP)) == (USED | KEEP))
- pdictsize++;
- #ifndef NO_CAPITALIZATION_SUPPORT
- while (lent->flagfield & MOREVARIANTS)
- lent = lent->next;
- #endif
- }
- }
- for (cent = hashtbl, ehtab = hashtbl + hashsize;
- cent < ehtab;
- cent++)
- {
- if ((cent->flagfield & (USED | KEEP)) == (USED | KEEP))
- {
- /*
- ** We only want to count variant headers
- ** and standalone entries. These happen
- ** to share the characteristics in the
- ** test below. This test will appear
- ** several more times in this routine.
- */
- #ifndef NO_CAPITALIZATION_SUPPORT
- if (captype (cent->flagfield) != FOLLOWCASE
- && cent->word != NULL)
- #endif
- pdictsize++;
- }
- }
- sortlist = (struct dent **) malloc (pdictsize * sizeof (struct dent));
- }
- if (sortlist == NULL)
- {
- #endif
- for (cent = pershtab, ehtab = pershtab + pershsize;
- cent < ehtab;
- cent++)
- {
- for (lent = cent; lent != NULL; lent = lent->next)
- {
- if ((lent->flagfield & (USED | KEEP)) == (USED | KEEP))
- {
- toutent (dictf, lent, 1);
- #ifndef NO_CAPITALIZATION_SUPPORT
- while (lent->flagfield & MOREVARIANTS)
- lent = lent->next;
- #endif
- }
- }
- }
- for (cent = hashtbl, ehtab = hashtbl + hashsize;
- cent < ehtab;
- cent++)
- {
- if ((cent->flagfield & (USED | KEEP)) == (USED | KEEP))
- {
- #ifndef NO_CAPITALIZATION_SUPPORT
- if (captype (cent->flagfield) != FOLLOWCASE
- && cent->word != NULL)
- #endif
- toutent (dictf, cent, 1);
- }
- }
- #if SORTPERSONAL != 0
- return;
- }
- /*
- ** Produce dictionary in sorted order. We used to do this
- ** destructively, but that turns out to fail because in some modes
- ** the dictionary is written more than once. So we build an
- ** auxiliary pointer table (in sortlist) and sort that. This
- ** is faster anyway, though it uses more memory.
- */
- sortptr = sortlist;
- for (cent = pershtab, ehtab = pershtab + pershsize; cent < ehtab; cent++)
- {
- for (lent = cent; lent != NULL; lent = lent->next)
- {
- if ((lent->flagfield & (USED | KEEP)) == (USED | KEEP))
- {
- *sortptr++ = lent;
- #ifndef NO_CAPITALIZATION_SUPPORT
- while (lent->flagfield & MOREVARIANTS)
- lent = lent->next;
- #endif
- }
- }
- }
- for (cent = hashtbl, ehtab = hashtbl + hashsize; cent < ehtab; cent++)
- {
- if ((cent->flagfield & (USED | KEEP)) == (USED | KEEP))
- {
- #ifndef NO_CAPITALIZATION_SUPPORT
- if (captype (cent->flagfield) != FOLLOWCASE
- && cent->word != NULL)
- #endif
- *sortptr++ = cent;
- }
- }
- /* Sort the list */
- qsort ((char *) sortlist, (unsigned) pdictsize,
- sizeof (sortlist[0]),
- (int (*) P ((const void *, const void *))) pdictcmp);
- /* Write it out */
- for (sortptr = sortlist; --pdictsize >= 0; )
- toutent (dictf, *sortptr++, 1);
- free ((char *) sortlist);
- #endif
-
- newwords = 0;
-
- (void) fclose (dictf);
- }
-
- VOID * mymalloc (size)
- unsigned int size;
- {
-
- return malloc ((unsigned) size);
- }
-
- void myfree (ptr)
- VOID * ptr;
- {
- if (hashstrings != NULL && (char *) ptr >= hashstrings
- && (char *) ptr <= hashstrings + hashheader.stringsize)
- return; /* Can't free stuff in hashstrings */
- free (ptr);
- }
-
- #ifdef REGEX_LOOKUP
-
- /* check the hashed dictionary for words matching the regex. return the */
- /* a matching string if found else return NULL */
- char * do_regex_lookup (expr, whence)
- char * expr; /* regular expression to use in the match */
- int whence; /* 0 = start at the beg with new regx, else */
- /* continue from cur point w/ old regex */
- {
- static struct dent * curent;
- static int curindex;
- static struct dent * curpersent;
- static int curpersindex;
- static char * cmp_expr;
- char dummy[INPUTWORDLEN + MAXAFFIXLEN];
- ichar_t * is;
-
- if (whence == 0)
- {
- is = strtosichar (expr, 0);
- upcase (is);
- expr = ichartosstr (is, 1);
- cmp_expr = REGCMP (expr);
- curent = hashtbl;
- curindex = 0;
- curpersent = pershtab;
- curpersindex = 0;
- }
-
- /* search the dictionary until the word is found or the words run out */
- for ( ; curindex < hashsize; curent++, curindex++)
- {
- if (curent->word != NULL
- && REGEX (cmp_expr, curent->word, dummy) != NULL)
- {
- curindex++;
- /* Everybody's gotta write a wierd expression once in a while! */
- return curent++->word;
- }
- }
- /* Try the personal dictionary too */
- for ( ; curpersindex < pershsize; curpersent++, curpersindex++)
- {
- if ((curpersent->flagfield & USED) != 0
- && curpersent->word != NULL
- && REGEX (cmp_expr, curpersent->word, dummy) != NULL)
- {
- curpersindex++;
- /* Everybody's gotta write a wierd expression once in a while! */
- return curpersent++->word;
- }
- }
- return NULL;
- }
- #endif /* REGEX_LOOKUP */
-